version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220616          // set seed

*-------------------------------------------------------
* project: daylight saving time (dst)
*-------------------------------------------------------

local pgm  "dst-data01_places_2020_release_mountain_pacific_clean"  // file name
local who  "Muzhe Yang"                                             // author
local dte  "2022-06-16"                                             // created date
local dte2 "`c(current_date)'"                                      // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\management\\`pgm'.txt", replace text
display "`tag'"

import excel "data_raw\places\places_2020_release_mountain_pacific.xlsx", sheet("places_2020_release_mountain_pa") firstrow clear

gen wave = 1
gen double lack_health_ins = ACCESS2_CrudePrev
gen double arthritis = ARTHRITIS_CrudePrev
gen double binge = BINGE_CrudePrev
gen double bp_high = BPHIGH_CrudePrev
gen double bp_med = BPMED_CrudePrev
gen double cancer = CANCER_CrudePrev
gen double asthma = CASTHMA_CrudePrev
gen double cervical_screen = CERVICAL_CrudePrev
gen double heart_disease = CHD_CrudePrev
gen double checkup = CHECKUP_CrudePrev
gen double chol_screen = CHOLSCREEN_CrudePrev
gen double colon_screen = COLON_SCREEN_CrudePrev
gen double copd = COPD_CrudePrev
gen double core_prev_m = COREM_CrudePrev
gen double core_prev_f = COREW_CrudePrev
gen double smoking = CSMOKING_CrudePrev
gen double dental = DENTAL_CrudePrev
gen double diabetes = DIABETES_CrudePrev
gen double high_chol = HIGHCHOL_CrudePrev
gen double kidney = KIDNEY_CrudePrev
gen double lack_phy_act = LPA_CrudePrev
gen double mammo_use = MAMMOUSE_CrudePrev
gen double mental_hlth_distress = MHLTH_CrudePrev
gen double obesity = OBESITY_CrudePrev
gen double phy_hlth_distress = PHLTH_CrudePrev
gen double sleep_depr = SLEEP_CrudePrev
gen double stroke = STROKE_CrudePrev
gen double teeth_lost = TEETHLOST_CrudePrev
gen double centroid_lat = Latitude
gen double centroid_lon = Longitude
gen double dist_to_border = NEAR_DIST
gen time_zone = Zone

label variable wave                 "1 = PLACES 2020 release; 2 = PLACES 2021 release"
label variable lack_health_ins      "Lack of health insurance crude prevalence (%)"
label variable arthritis            "Arthritis crude prevalence (%)"
label variable binge                "Binge drinking crude prevalence (%)"
label variable bp_high              "High blood pressure crude prevalence (%)"
label variable bp_med               "Taking BP medication crude prevalence (%)"
label variable cancer               "Cancer (except skin) crude prevalence (%)"
label variable asthma               "Current asthma crude prevalence (%)"
label variable cervical_screen      "Cervical cancer screening crude prevalence (%)"
label variable heart_disease        "Coronary heart disease crude prevalence (%)"
label variable checkup              "Annual checkup crude prevalence (%)"
label variable chol_screen          "Cholesterol screening crude prevalence (%)"
label variable colon_screen         "Colorectal cancer screening crude prevalence (%)"
label variable copd                 "Chronic obstructive pulmonary disease crude prevalence (%)"
label variable core_prev_m          "Core preventive services for older men crude prevalence (%)"
label variable core_prev_f          "Core preventive services for older women crude prevalence (%)"
label variable smoking              "Current smoking crude prevalence (%)"
label variable dental               "Dental visit crude prevalence (%)"
label variable diabetes             "Diabetes crude prevalence (%)"
label variable high_chol            "High cholesterol crude prevalence (%)"
label variable kidney               "Chronic kidney disease crude prevalence (%)"
label variable lack_phy_act         "Physical inactivity crude prevalence (%)"
label variable mammo_use            "Mammography use crude prevalence (%)"
label variable mental_hlth_distress "Frequent mental health distress crude prevalence (%)"
label variable obesity              "Obesity crude prevalence (%)"
label variable phy_hlth_distress    "Frequent physical health distress crude prevalence (%)"
label variable sleep_depr           "Sleep < 7 hours crude prevalence (%)"
label variable stroke               "Stroke crude prevalence (%)"
label variable teeth_lost           "Teeth loss crude prevalence (%)"
label variable centroid_lat         "census tract's centroid's latitude"
label variable centroid_lon         "census tract's centroid's longitude"
label variable dist_to_border       "distance between a census tract's centroid and the time zone border (in meters)"
label variable time_zone            "time zone name"

keep wave TractFIPS StateAbbr StateName CountyName CountyFIPS ///
     centroid_lon centroid_lat dist_to_border time_zone ///
     arthritis asthma bp_high cancer copd diabetes heart_disease high_chol kidney obesity stroke teeth_lost ///
     bp_med checkup cervical_screen chol_screen colon_screen core_prev_m core_prev_f dental lack_health_ins mammo_use ///
     binge lack_phy_act sleep_depr smoking ///
     mental_hlth_distress phy_hlth_distress
order wave TractFIPS StateAbbr StateName CountyName CountyFIPS ///
      centroid_lon centroid_lat dist_to_border time_zone ///
      arthritis asthma bp_high cancer copd diabetes heart_disease high_chol kidney obesity stroke teeth_lost ///
      bp_med checkup cervical_screen chol_screen colon_screen core_prev_m core_prev_f dental lack_health_ins mammo_use ///
      binge lack_phy_act sleep_depr smoking ///
      mental_hlth_distress phy_hlth_distress
sort TractFIPS
codebook, compact
compress
save "data_clean\places\\`pgm'.dta", replace 

log close
exit